BEGIN         { FS                    = " *[;#] *"
                PROCINFO["sorted_in"] = "@ind_num_asc"
                readCLDR()
              }
/^$/          { next }
/^# group/    { group       = substr($2, index($2, ":") + 2)
                groups[++g] = group
                #sg          = 0
              }
/^# subgroup/ { subgroup           = substr($2, index($2, ":") + 2)
                subgroups[g][++sg] = subgroup
              }

$2 == "fully-qualified" {
    codepoints = $1
    name       = substr($3, match($3, /[0-9] /) + 2)   # 😀 E2.0 grinning face

    tone   = "false"
    gender = 0

    # The input file contains all possible combinations, but we only want to
    # store the emojis without gender/tone variants and then record *if* it
    # supports setting this.
    #
    # Remove the variation markers if this is an emoji with variants and
    # remember it supports the variants.

    if (index(name, "skin tone") > 0) {
        tone = "true"
        name = gensub(/(light|medium-light|medium|medium-dark|dark) skin tone(, )?/, "", 1, name)
    }
    name = gensub("[ :]*$", "", "g", name)
    if (!(name in namemap)) {
        namemap[name] = ++idx
    }
    tones[idx] = tone

    delete cp
    split(codepoints, cp2, " ")
    for (i in cp2) {
        switch (cp2[i]) {
        case "200D": break # Ignore
        case /1F3FB|1F3FC|1F3FD|1F3FE|1F3FF/:
            tone = "true"
            break
        # Old/classic gendered emoji. A "person" emoji is combined with
        # "female sign" or "male sign" to make an explicitly gendered one.
        case /2640|2642/:
            if (i == 0)
                cp[cpn++] = cp2[i]
            else
                gender = 1  # GenderSign
            break
        default:
            cp[cpn++] = cp2[i]
        }
    }
    codepoints2 = ""
    for (i in cp) {
        codepoints2 = codepoints2 "0x" tolower(cp[i]) ", "
    }

    if (tone != "false" || gender > 0)
        next

    # if (cp in emojis) {
    #   emojis[cp]["gender"] = gender
    #   emojis[cp]["tone"]   = tone
    # } else
    #     emojis[cp] = [...]
    #
    # OR
    #
    # if (cp in emojis) {
    #    if (gender != 0)     emojis[cp] = gensub(/GENDER/, gender, "g", emojis[cp])
    #    if (tone != "false") emojis[cp] = gensub(/TONE/,   tone,   "g", emojis[cp])
    # }
 
    cldrName = cldr[tochr(cp)]
    if (cldrName == "")
        cldrName = "[]string(nil)" # TODO: just nil
    else
        cldrName = "[]string{" cldrName "}"

    emojis[idx] = sprintf("\t{[]rune{%s}, \"%s\", %d, %d, %s, TONE, GENDER},",
        codepoints2, name, g-1, sg-1, cldrName)
        #gensub(" ", ", 0x", "g", tolower(codepoints)), name, g-1, sg-1, cldrName)

    #emojis = emojis sprintf("\t{[]rune{%s}, \"%s\", %d, %d, %s, %s, %d},\n",
    #    codepoints, name, g-1, sg-1, cldrName, tone, gender)
}

END {
    print("// Code generated by gen.zsh; DO NOT EDIT\n\npackage unidata\n")

    print("// Emoji groups.\nconst (")
    for (i in groups)
        print("\t" mkconst(groups[i]), (i == 1 ? " = EmojiGroup(iota)" : ""))

    print(")\n")
    print("// EmojiGroups is a list of all emoji groups.\n" \
          "var EmojiGroups = map[EmojiGroup]struct{\n" \
              "\tName      string\n" \
              "\tSubgroups []EmojiSubgroup\n" \
          "}{")
    for (i in groups) {
        subs = ""
        l    = 0
        for (j in subgroups[i]) {
            c    = mkconst(subgroups[i][j])
            subs = subs c ", "
            if ((l += length(c)) >= 75) {
                subs = subs "\n"
                l = 0
            }
        }
        printf("\t%s: {\"%s\", []EmojiSubgroup{\n%s}},\n", mkconst(groups[i]), groups[i], subs)
    }
    print("}\n")

    print("// Emoji subgroups.\nconst (")
    for (i in subgroups)
        for (j in subgroups[i])
            print("\t" mkconst(subgroups[i][j]), (i == 1 && j == 1 ? " = EmojiSubgroup(iota)" : ""))
    print(")\n")
    print("// EmojiSubgroups is a list of all emoji subgroups.\n" \
          "var EmojiSubgroups = map[EmojiSubgroup]struct{\n" \
              "\tGroup EmojiGroup\n" \
              "\tName  string\n" \
          "}{")
    for (i in subgroups)
        for (j in subgroups[i])
            printf("\t%s: {%s, \"%s\"},\n", mkconst(subgroups[i][j]), mkconst(groups[i]), subgroups[i][j])
    print("}\n")

    print("var Emojis = []Emoji{")
    for (i in emojis) {
        e = emojis[i]
        e = gensub("TONE", tones[i], 1, e)
        e = gensub("GENDER", "0", 1, e)
        print(e)
    }
    print("}")
}

function mkconst(s,     i) {
    while (i = index(s, "-"))
        s = substr(s, 0, i-1) toupper(substr(s, i+1, 1)) substr(s, i+2)
    return "Emoji" toupper(substr(s, 1, 1)) substr(gensub(/ & /, "And", "g", s), 2)
}

function tochr(a,    k, n) {
    for (k in a)
        if (a[k] != "FE0F")
            n = n a[k]
    return sprintf("%c", strtonum("0x" n))
}

function readCLDR() {
    # TODO:
    # <annotation cp="&amp;">ampersand | and | et</annotation>
    # <annotation cp="&lt;">less than | less-than | open tag | tag</annotation>
    # <annotation cp="&gt;">close tag | greater than | greater-than | tag</annotation>
    # <annotation cp="🔣">〒♪&amp;% | input | input symbols</annotation>
    while ("grep 'annotation ' .cache/en.xml | grep -v ' type=\"tts\"'" | getline line > 0) {
        split(line, l2, /[<>"]/)
        cldr[l2[3]] = "\"" gensub(/ \| /, "\", \"", "g", l2[5]) "\""
    }
}
